Construction of Knowledge Graph


In [1]:
import pandas as pd

def convert_str(a):
    '''
    Accepts: String in form of a-b, returns it literally.
    '''
    a,b=a[0],a[-1]
    result = 0
    if a == 0:
        result = -1
    elif b == 0:
        result = 1
    else:
        result = 0
    return int(a)-int(b) + result  #extra points for good defence


df = pd.read_csv('fd.csv')
df.index = df[df.columns[0]]
df.drop(df.columns[0],inplace=True,axis=1)

## Knowledge graph construction starts

import numpy as np
graph = list(np.ones((20,20)))

for i in range(0,19):
    for j in range(i+1,20):
        graph[i][j] = convert_str(df.iloc[i,j])
        graph[j][i] = convert_str(df.iloc[j,i])
        
df2 = pd.read_csv('fd2015.csv')
df2.index = df2[df2.columns[0]]
df2.drop(df2.columns[0],inplace=True,axis=1)

for i in range(0,19):
    for j in range(i+1,20):
        if df2.columns[i] in df.columns and df2.columns[j] in df.columns:
            graph[i][j] = float((graph[i][j] + convert_str(df2.iloc[i,j])))/2.0
            graph[j][i] = float((graph[j][i] + convert_str(df2.iloc[j,i])))/2.0

df3 = pd.read_csv('fd2016.csv')
df3.index = df3[df3.columns[0]]
df3.drop(df3.columns[0],inplace=True,axis=1)

for i in range(0,19):
    for j in range(i+1,20):
        if df3.columns[i] in df.columns and df3.columns[j] in df.columns:
            graph[i][j] = float((graph[i][j] + convert_str(df3.iloc[i,j])))/2.0
            graph[j][i] = float((graph[j][i] + convert_str(df3.iloc[j,i])))/2.0

graph = np.array(graph)

Taking Decision


In [2]:
max_margin = np.max(graph)
min_margin = np.min(graph)

established_total_weights = [sum(graph[j]) for j in range(20)]

def uniform_distri(x,max_margin,min_margin):
    return float((x-min_margin))/float((max_margin-min_margin))

def intimidation_factor(team1_str,team2_str):
    total_weights = [sum(graph[j]) for j in range(0,20)]
    min_tot = min(total_weights)
    max_tot = max(total_weights)
    id1 = list(df.columns.values).index(team1_str)
    id2 = list(df.columns.values).index(team2_str)
    return abs(float(total_weights[id1]-total_weights[id2])/(max_tot-min_tot))
    

def who_will_win(team1_str,team2_str):
    id1 = list(df.columns.values).index(team1_str)
    id2 = list(df.columns.values).index(team2_str)
    value = graph[id1][id2]
    past_result = uniform_distri(value,max_margin,min_margin)
    intimidation_result = intimidation_factor(team1_str,team2_str)
    result = 0.48 * intimidation_result + 0.52 * past_result 
    return result


def choose_two_teams(avaliable_players):
    team1 = np.random.choice(avaliable_players)
    team2 = team1
    while team2 == team1:
        team2 = np.random.choice(avaliable_players)
    return team1,team2

def simulate_tournament():
    avaliable_players = list(df.columns.values)
    while len(avaliable_players)!=1:
        team1,team2 = choose_two_teams(avaliable_players)
        id1 = avaliable_players.index(team1)
        id2 = avaliable_players.index(team2)
        result = who_will_win(team1,team2)
        if result >0.5:
            del avaliable_players[id2]
        else:
            del avaliable_players[id1]
    return avaliable_players[0]

from collections import Counter
def find_hot_favorites():
    tournament_wins = []
    for i in range(2000):
        tournament_wins.append(simulate_tournament())
    outcome = dict(Counter(tournament_wins))
    actual_outcome = sorted(outcome.items(), key=lambda x: (-x[1], x[0]))
    return actual_outcome[0][0],actual_outcome[1][0],actual_outcome[2][0]

In [3]:
find_hot_favorites()


Out[3]:
('Chelsea', 'Manchester City', 'Arsenal')